## Warning: Using size for a discrete variable is not advised.
## Warning: Using size for a discrete variable is not advised.
## Warning: Using size for a discrete variable is not advised.
The data set used in this assignment consists of 30 observations and 28 variables. Each observation represents a team. Two of the variables are qualitative and 26 of the variables are quantitative.
The numeric variables in the dataset have different scales. A consequence of this is that variables with big scales will get an unproportionally big influence in the MDS. In order for MDS to be reasonable the variables should first be standardized.
## initial value 19.856833
## iter 5 value 16.319153
## iter 10 value 16.046215
## final value 15.935476
## converged
library(readxl)
library(MASS)
library(plotly)
library(tidyverse)
###Assigment1####
################
olive <- read.csv("olive.csv")
ggplot(olive, aes(x=palmitic, y=oleic,color=linoleic)) + geom_point(size=1)
################
######2.1#######
################
ggplot(olive, aes(x=palmitic, y=oleic,color= cut_interval(linoleic,n=4))) + geom_point(size=1)
################
######2.2.1#####
################
ggplot(olive, aes(x=palmitic, y=oleic,color= cut_interval(linoleic,n=4))) + geom_point(size=1)
################
######2.2.2#####
################
ggplot(olive, aes(x=palmitic, y=oleic,color= cut_interval(linoleic,n=4))) + geom_point(size=2)
################
######2.2.3#####
################
ggplot(olive, aes(x=palmitic, y=oleic)) + geom_point(size=1)+
geom_spoke(aes(angle = linoleic, radius = -100))
################
######2.3#######
################
ggplot(olive, aes(x=oleic, y=eicosenoic,color= Region)) + geom_point(size=1)
################
######2.3.1#####
################
ggplot(olive, aes(x=oleic, y=eicosenoic,color= cut_interval(Region,n=4))) + geom_point(size=1)
################
######2.4#######
################
ggplot(olive, aes(x=oleic, y=eicosenoic, color=cut_interval(linoleic,n=3))) + geom_point(aes(shape=cut_interval(palmitic,n=3) ,size=cut_interval(palmitoleic,n=3)))
################
######2.5#######
################
ggplot(olive, aes(x=oleic, y=eicosenoic, color=Region))+ geom_point(aes(shape=cut_interval(palmitic,n=3) ,size=cut_interval(palmitoleic,n=3)))
################
######2.6#######
################
fig <- plot_ly(olive, labels = ~Area, type = 'pie',
textposition = 'inside',
textinfo = 'label+percent',
insidetextfont = list(color = '#FFFFFF'),
hoverinfo = 'text',
marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)),
showlegend = FALSE)
fig <- fig %>% layout(title = 'Proportions of Oils',
xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
fig
################
######2.7#######
################
ggplot(olive, aes(x = linoleic, y = eicosenoic)) +
geom_point() +
geom_density_2d()
#############
#### 1.1 ####
#############
set.seed(1)
data <- read_xlsx("baseball-2016.xlsx")
#############
#### 1.2 ####
#############
baseball_numeric <- scale(data[,sapply(data, function(x) { is.numeric(x) })])
d <- dist(x = baseball_numeric,
method = "minkowski", p = 2)
res <- isoMDS(d, k = 2)
coords <- res$points
coordsMDS <- as.data.frame(coords)
coordsMDS$Team <- data$Team
coordsMDS$League <- data$League
plot_ly(coordsMDS, x = ~V1, y = ~V2, type = "scatter", mode = "markers", hovertext = ~Team, color = ~League, colors = c("#377eb8", "#ef553b"))
#############
#### 1.3 ####
#############
sh <- Shepard(d, coords)
delta <- as.numeric(d)
D <- as.numeric(dist(coords))
index <- matrix(1:nrow(coords), nrow = nrow(coords), ncol = nrow(coords))
index1 <- as.numeric(index[lower.tri(index)])
index <- matrix(1:nrow(coords), nrow = nrow(coords), ncol = nrow(coords), byrow = TRUE)
index2 <- as.numeric(index[lower.tri(index)])
plot_ly()%>%
add_markers(x = ~delta, y = ~D, hoverinfo = 'text',
text = ~paste('Team 1: ', data$Team[index1],
'<br> Team 2: ', data$Team[index2])) %>%
add_lines(x = ~sh$x, y = ~sh$yf)
#############
#### 1.4 ####
#############
data_scatter <- data.frame(coordsMDS$V2, baseball_numeric)
data$V2 <- coordsMDS$V2
plot_ly(data, x = ~V2, y = ~HR.per.game, type = "scatter", mode = "markers", hovertext = ~Team, color = ~League, colors = c("#377eb8", "#ef553b"))
plot_ly(data, x = ~V2, y = ~`3B`, type = "scatter", mode = "markers", hovertext = ~Team, color = ~League, colors = c("#377eb8", "#ef553b"))
Simon and Mohamed devised the whole assignment together, the main conceptual ideas and codes outline. Mohamed worked out Assignment 1 (Perception in Visualization), and the report creation using r markdown, Simon worked out Assignment 2 (Multidimensional scaling of a high-dimensional dataset) and carried out all codes and functions..